/* this do-file extracts the relevant variable from the 2001 Census (10% sample) */ 

****************
** Census 2001** 
****************

cd "" /*input directory here with raw data */

use "SA Census 2001 Person_v1.1_20111024.dta", clear

sort sn 

merge m:1 sn using "SA Census 2001 Hhold_v1.1_20111024.dta" 


rename p01_pno id
rename sn household_id
rename p02_age age
rename p03_sex sex       
rename p05_mar marital_status
rename der27_ed education
rename p17_educ edu_specific
rename p19b_ind industry
rename p19c_occ occupation
rename pr_code province
rename dc_munic district
rename EATYPE96 geotype 
rename p20b_lda yc_birth_day
rename p20b_lmt yc_birth_month
rename p20b_lye yc_birth_year
rename der10_em emp_status
rename weight weight
rename p06_race race
rename p19_wsta work_status
rename p20b_la child_alive
rename p22_incm income_category 
rename p20_tceb number_children 
rename p14a_mno mother_id
rename p02_yr birth_year
rename p02_mth birth_month
rename p09_rsa  born_south_africa
rename munic_co municipality
rename p05a_spo spouse_id
rename p09a_prv province_birth
rename der16_hh hh_income 
 

gen school_attend=(p16_stud!=1) /*Person attends any type of educational institution */ 


#delimit
keep geotype district province* occupation industry education marital_status sex age household_id   spouse_id 
yc_birth_year yc_birth_month yc_birth_day
emp_status race weight work_status child_alive income_category number_children mother_id id birth_year born_south_africa
municipality school_attend birth_month edu_specific hh_income 
h23a_hu h29_frid h29_radi h29_tv h29_comp h29_cell h25_tenu h24_room h26_pipe h28c_lgh h27_toil;
#delimit cr


**********************
/* 1-digit industry */ 
**********************

replace industry=. if industry==0 
replace industry=0 if industry<100

tostring industry,  gen(industry2)
gen industry3=substr(industry2,1,1) 

drop industry 
rename industry3 industry 
destring industry, replace
replace industry=10 if industry==0 
label define industry 10 "Private Households&Other" 1 "Agriculture; hunting; forestry and fishing" 2 "Mining and quarrying" 3 "Manufacturing" 4 "Electricity; gas and water supply" 5 "Construction" 6 "Wholesale and retail trade" 7 "Transport; storage and communication" 8 "Financial; insurance; real estate and business services" 9 "Community; social and personal services"      
label values industry industry

************************
/* 1-digit occupation */ 
************************

replace occupation=. if occupation==0 
replace occupation=. if occupation==998 

tostring occupation,  gen(occupation2)
gen occupation3=substr(occupation2,1,1) 

drop occupation 
rename occupation3 occupation 
rename occupation2 occ_specific
destring occupation, replace 
destring occ_specific, replace 
label define occupation1 1 "Legislators; senior officials and managers" 2 "Professionals" 3 "Technicians and associate professionals" 4 "Clerks" 5 "Service workers; shop and market sales workers" 6 "Skilled agricultural and fishery workers" 7 "Craft and related trades workers" 8 "Plant and machine operators and assemblers" 9 "Elementary occupations"      
label values occupation occupation1

***********************
/* Employment Status */ 
***********************

gen status=1 if emp_status==1 
replace status=2 if work_status==3 | work_status==4
replace status=3 if emp_status==2
replace status=4 if emp_status==0 | emp_status==3 

label define status 1 "Employee" 2 "Self-Employed" 3 "Unemployed" 4 "Inactive"
label values status status 

***********************
* Years of Education ** 
***********************

gen yrs_schooling=edu_specific

recode yrs_schooling (99=0) (1=2) (2=3) (3=4) (4=5) (5=6) (6=7) (7=8) (8=9) (9=10) (10=11) (11=12) (12=13) (13=11) (14=11) (15/20=14)  
label var yrs_schooling "Years of Schooling (Careful: Grade 0=1, and 14 indicates any post high school studies)"  
replace yrs_schooling=. if yrs_schooling<0 | yrs_schooling>14


*******************
* Household Assets* 
*******************

* House Type * 

gen house_type=(h23a_hu==1)
label define house_type 0 "Other" 1 "House or Brick Structure"
label values house_type house_type
drop h23a_hu

** Fridge ** 

gen fridge=(h29_frid==1)
drop h29_frid

** Radio ** 

gen radio=(h29_radi==1)
drop h29_radi

** TV ** 
gen tv=(h29_tv==1)
drop h29_tv

** Computer ** 
gen computer=(h29_comp==1) 
drop  h29_comp

** Cell phone ** 

gen cell_phone=(h29_cell==1)
drop h29_cell

** House Tenure ** 

rename h25_tenu hh_tenure 
recode hh_tenure (9=.) 

** House Rooms ** 

rename h24_room hh_rooms 
recode hh_rooms (98=.) (99=.) (999=.) 

** Water In dewlling ** 

gen hh_water=(h26_pipe==5)
drop h26_pipe
label var hh_water "Dwelling with water connection"

** Electricity ** 

gen electricity=( h28c_lgh==1)
drop  h28c_lgh
label var electricity "Electricity source for lighting"

** Toilet ** 

gen h_toilet=(h27_toil==1 | h27_toil==2 | h27_toil==3)
label var h_toilet "1 if toilet, 0 if latrine or nothing"
drop h27_toil


*******************
/*Household Size */ 
*******************

gen ind=1 
bys household_id: egen household_size=sum(ind)

gen year=2001 /* year variable */ 


save south_africa_census_2001_wber.dta, replace  
